Differential binding of the three PURA HeLa iCLIPs - 3 flag vs oe

Author

Melina Klostermann

Published

November 3, 2023

Show code
install.packages("ggplot2")
#install.packages("ggpubr")
install.packages("ggrastr")
install.packages("ggpointdensity")
install.packages("viridis")

BiocManager::install("BindingSiteFinder")
BiocManager::install("GenomicFeatures")
BiocManager::install("DESeq2")

1 Input

Show code
# path for output
outpath <- "~/Thesis/Thesis_code/PURA/07_Differential_binding/"

bds <- readRDS(paste0(outpath, "bds_merge_flag_oe.rds"))

bds
Dataset:  flag_oe
Object of class BSFDataSet 
#N Ranges:  147,893 
Width ranges:  5 
#N Samples:  6 
#N Conditions:  2 
Show code
# gene annotation
annotation <- readRDS("~/PURA/Molitor-et-al-2022/annotation.rds")
anno_txdb <- makeTxDbFromGRanges(annotation)

gns = genes(anno_txdb)
gns$gene_id = sub("\\..*", "", gns$gene_id)
idx = match(gns$gene_id, annotation$gene_id)
elementMetadata(gns) = cbind(elementMetadata(gns), elementMetadata(annotation)[idx,])
names(gns) = sub("\\..*", "", names(gns))
meta = data.frame(gene_id = gns$gene_id, gene_name = gns$gene_name, gene_type = gns$gene_type)
mcols(gns) = meta
gns$geneID = names(gns)

cdseq = cds(anno_txdb)
intrns = unlist(intronsByTranscript(anno_txdb))
utrs3 = unlist(threeUTRsByTranscript(anno_txdb))
utrs5 = unlist(fiveUTRsByTranscript(anno_txdb))
trl = GRangesList(CDS = cdseq, Intron = intrns, UTR3 = utrs3, UTR5 = utrs5)

# saveRDS(trl, paste0(outpath, "transcript_region_list_test.rds"))

2 Assign genes and regions

Show code
bds <- assignToGenes(bds, anno.genes = gns, overlaps = "frequency")

# saveRDS(bds, paste0(outpath, "bds_test.rds"))
#bds <- assignToTranscriptRegions(bds, anno.transcriptRegionList = trl, overlaps.rule = c("UTR3", "UTR5", "CDS", "Intron"))

# bds <- assignToTranscriptRegions(bds, anno.transcriptRegionList = trl, overlaps = "frequency")
# getRanges(bds)

3 Calculate background

Show code
# compute the binding site and background coverage
bds = calculateBsBackground(bds, anno.genes = gns)

# filter background 
bds = filterBsBackground(bds,
                          minCounts.cutoff = 1000,
                         balanceBackground.cutoff.bs = 0.2,
                         balanceBackground.cutoff.bg = 0.8)

plotBsBackgroundFilter(bds, filter = "minCounts")

Show code
plotBsBackgroundFilter(bds, filter = "balanceBackground")

Show code
plotBsBackgroundFilter(bds, filter = "balanceCondition")

4 Caluclate Changes

Show code
bds <- calculateBsFoldChange(bds)

5 Changes

5.1 binding changes

Show code
plotBsMA(bds, what = "bg")

Show code
plotBsVolcano(bds, what = "bg")

5.2 background changes

Show code
plotBsMA(bds, what = "bs")

Show code
plotBsVolcano(bds, what = "bs")

Show code
bs <- getRanges(bds) %>% as.data.frame()

Number of binding sites: 146478

Number of sig changing binding sites: 15425

Number of sig changing background: 34122

5.3 binding vs background changes

Show code
bs <- bs %>% mutate(bs, bin = cut(bg.log2FoldChange, breaks = seq(-3,3, 0.5)))

ggplot(bs, aes(x = bin, y = bs.log2FoldChange))+
  geom_violin()+
  geom_boxplot(width = 0.25)

6 Save file

Show code
# rds
saveRDS(bs, paste0(outpath, "merged_bs_diff_flag_oe_res.rds"))

# beds
up_bs <- bs %>% subset((bs$bs.padj < 0.01) & (bs$bs.log2FoldChange > 0)) %>%
  makeGRangesFromDataFrame(.)

down_bs <- bs %>% subset((bs$bs.padj < 0.01) & (bs$bs.log2FoldChange < 0))%>%
  makeGRangesFromDataFrame(.)

up_bg <- bs %>% subset((bs$bg.padj < 0.01) & (bs$bg.log2FoldChange > 0)) %>%
  makeGRangesFromDataFrame(.)

down_bg <- bs %>% subset((bs$bg.padj < 0.01) & (bs$bg.log2FoldChange < 0))%>%
  makeGRangesFromDataFrame(.)

rtracklayer::export.bed(up_bs, paste0("merged_bs_upbs_oevsflag.bed"))
rtracklayer::export.bed(down_bs, paste0("merged_bs_downbs_oevsflag.bed"))
rtracklayer::export.bed(up_bg, paste0("merged_bs_upbg_oevsflag.bed"))
rtracklayer::export.bed(down_bg, paste0("merged_bs_downbg_oevsflag.bed"))